###############################################################
## Liao and McDowell ISQ Multiple Imputation Replication R Code
## 9/25/2013
## R version 3.0.1 (2013-05-16) -- "Good Sport"
## NOTE: The following code implements Multiple Imputation with 
##       Amelia
###############################################################
# clear all objects in workspace
rm(list = ls())

# load library
library(Amelia)
library(parallel)
library(foreign)

# set seed
set.seed(1234)

# set number of cores
cores <- detectCores()

# load data
data <- read.dta("yuan.dta")

# Due to super high collinearity beteween China Statistical Yearbook and UNCTAD data 
# (UNCTAD basically uses Yearbook data), 
# which causes a problem with multiple imputation, and that Yearbook has much better coverage than UNCTAD, 
# we drop UNCTAD FDI variables for China inward and outward flow and stock data
# Due to the same problem, we also drop WDI fdi inflow data from world for partner countries since UNCTAD has slightly better coverage.
myvars.drop <- names(data) %in% c("chfdi_oflo_un","chfdi_osto_un","chfdi_iflo_un", # Yearbook data is better than UNCTAD for China stats
                                  "chwfdi_oflo_un","chwfdi_osto_un","chwfdi_iflo_un", "pwfdi_iflo_wdi", # UNCTAD data is better than WDI for Partner stats
                                  "chfdi_flows_un","chwfdi_flows_un","chfdidep_un","pfdidep_un",
                                  "chfdi_iflo_dep_un","chfdi_oflo_dep_un","pfdi_iflo_dep_un","pfdi_oflo_dep_un",
                                  "chfdi_osto_book","chwfdi_osto_book", # won't be using stock data
                                  "rarearthexp",# drop rarearth since we will not be using it and China is the only main producer
                                  "asean","asean3", # not using ASEAN in model
                                  "fta_neg","fta_cons","bits_sig", # not used in de jure model
                                  "rescur", # not using reserve currency in model
                                  "cn_dias","cn_dias_p", # not using diaspora in model
                                  "colony","comcol","curcol","col45","smctry",#comcol, curcol, col45 don't vary, only Mongolia is former colony of China
                                  "dist","distcap","distwces","distw", # we already have dist_k
                                  "democ","autoc",# we already have polity2
                                  "POP","ppp","rgdpl","rgdpl2","openk","openc" # won't be using these
                                  )
data <- data[!myvars.drop]

# bound partner FDI dependence to 1 for the few outliers
data$pfdidep_book <- ifelse(data$pfdidep_book > 1, 1, data$pfdidep_book)
data$pfdi_iflo_dep_book <- ifelse(data$pfdi_iflo_dep_book > 1, 1, data$pfdi_iflo_dep_book)
data$pfdi_oflo_dep_book <- ifelse(data$pfdi_oflo_dep_book > 1, 1, data$pfdi_oflo_dep_book)

# create bounds for FDI stock dependence variables and gdp growth
# This matrix will have 3 columns: the first is the column for the bounded variable, the second is the lower bound and the third is the upper bound.
bds <- matrix(c(35, 0, 1,
                38, 0, 1, 
                39, 0, 1,
                46, min(data$gdpgrow, na.rm = TRUE)-3, max(data$gdpgrow, na.rm = TRUE)+3), # plus/minus 3 percent max/min gdpgrowth rate
                nrow = 4, ncol = 3, byrow = TRUE) 

# start the clock
startTime <- proc.time()

# start imputation
mi <- amelia(x = data, 
             m = 5,
             idvars = c("country_x"), 
             logs = c("chimp","chexp","pimp","pexp","pwimp","pwexp",
                      "chfdi_iflo_book","pwfdi_oflo_un","chfdi_flows_book","pwfdi_flows_un",
                      "gdp2000","gdpcur","gdppc2000","gdppccur","rgdpch",
                      "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
             bounds = bds,
             lgstc = c("chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                       "chfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book"),
             lags = c("chimp","chexp","chwimp","chwexp","pimp","pexp","pwimp","pwexp",
                      "chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                      "chfdi_oflo_book","chfdi_iflo_book",
                      "chwfdi_oflo_book","chwfdi_iflo_book","pwfdi_iflo_un","pwfdi_oflo_un",
                      "chfdi_flows_book","chwfdi_flows_book","pwfdi_flows_un",
                      "chfdidep_book","pfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book","pfdi_iflo_dep_book","pfdi_oflo_dep_book",
                      "gdp2000","gdpcur","gdppc2000","gdppccur","gdpgrow","rgdpch",
                      "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
             leads = c("chimp","chexp","chwimp","chwexp","pimp","pexp","pwimp","pwexp",
                       "chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                       "chfdi_oflo_book","chfdi_iflo_book",
                       "chwfdi_oflo_book","chwfdi_iflo_book","pwfdi_iflo_un","pwfdi_oflo_un",
                       "chfdi_flows_book","chwfdi_flows_book","pwfdi_flows_un",
                       "chfdidep_book","pfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book","pfdi_iflo_dep_book","pfdi_oflo_dep_book",
                       "gdp2000","gdpcur","gdppc2000","gdppccur","gdpgrow","rgdpch",
                       "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
             ords = c("polity2"),
             cs = "iso3",
             ts = "year",
             empri = .005*nrow(data),
             parallel = "multicore",
             ncpus = cores)
             
# Stop the clock
elapsedTime <- proc.time() - startTime

# start the clock again
startTime2 <- proc.time()

# More MI datasets
mi.more <- amelia(x = data, 
                  m = 5,
                  idvars = c("country_x"), 
                  logs = c("chimp","chexp","pimp","pexp","pwimp","pwexp",
                           "chfdi_iflo_book","pwfdi_oflo_un","chfdi_flows_book","pwfdi_flows_un",
                           "gdp2000","gdpcur","gdppc2000","gdppccur","rgdpch",
                           "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
                  bounds = bds,
                  lgstc = c("chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                            "chfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book"),
                  lags = c("chimp","chexp","chwimp","chwexp","pimp","pexp","pwimp","pwexp",
                           "chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                           "chfdi_oflo_book","chfdi_iflo_book",
                           "chwfdi_oflo_book","chwfdi_iflo_book","pwfdi_iflo_un","pwfdi_oflo_un",
                           "chfdi_flows_book","chwfdi_flows_book","pwfdi_flows_un",
                           "chfdidep_book","pfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book","pfdi_iflo_dep_book","pfdi_oflo_dep_book",
                           "gdp2000","gdpcur","gdppc2000","gdppccur","gdpgrow","rgdpch",
                           "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
                  leads = c("chimp","chexp","chwimp","chwexp","pimp","pexp","pwimp","pwexp",
                            "chtradedep","ptradedep","chexpdep","chimpdep","pexpdep","pimpdep",
                            "chfdi_oflo_book","chfdi_iflo_book",
                            "chwfdi_oflo_book","chwfdi_iflo_book","pwfdi_iflo_un","pwfdi_oflo_un",
                            "chfdi_flows_book","chwfdi_flows_book","pwfdi_flows_un",
                            "chfdidep_book","pfdidep_book","chfdi_iflo_dep_book","chfdi_oflo_dep_book","pfdi_iflo_dep_book","pfdi_oflo_dep_book",
                            "gdp2000","gdpcur","gdppc2000","gdppccur","gdpgrow","rgdpch",
                            "oilexp","coalexpval","coalexpwt","copexpval","copexpwt","irnexpval","irnexpwt","potexpval","potexpwt","coal_prod_eia"),
                  ords = c("polity2"),
                  cs = "iso3",
                  ts = "year",
                  empri = .005*nrow(data),
                  parallel = "multicore",
                  ncpus = cores)
                  
# Stop the clock
elapsedTime2 <- proc.time() - startTime2

# combine MI datasets
mi.more <- ameliabind(mi, mi.more)
# save output
save(mi.more, file = "mi.rr.RData")

## post imputation transformations
load("mi.rr.RData")
# A pragmatic issue arises if there is missingness in either of the variables that go into the interactions.  
# In that case, it is considered best practice to create the interactions, impute the data, and then recreate the interactions. (James Honaker)
# recreating trade dependence variables
mi.more <- transform(mi.more, chtradedep = ((chimp + chexp)/(chwimp + chwexp)))
mi.more <- transform(mi.more, chexpdep = ((chexp)/(chwimp + chwexp)))
mi.more <- transform(mi.more, chimpdep = ((chimp)/(chwimp + chwexp)))
mi.more <- transform(mi.more, ptradedep = ((pimp + pexp)/(pwimp + pwexp)))
mi.more <- transform(mi.more, pexpdep = ((pexp)/(pwimp + pwexp)))
mi.more <- transform(mi.more, pimpdep = ((pimp)/(pwimp + pwexp)))

# recreate China fdi dependence variables
mi.more <- transform(mi.more, chfdi_flows_book = (abs(chfdi_iflo_book) + abs(chfdi_oflo_book)))
mi.more <- transform(mi.more, chwfdi_flows_book = (abs(chwfdi_iflo_book) + abs(chwfdi_oflo_book)))
mi.more <- transform(mi.more, pwfdi_flows_un = (abs(pwfdi_iflo_un) + abs(pwfdi_oflo_un)))
mi.more <- transform(mi.more, chfdidep_book = chfdi_flows_book/chwfdi_flows_book)
mi.more <- transform(mi.more, pfdidep_book = chfdi_flows_book/pwfdi_flows_un)
mi.more <- transform(mi.more, chfdi_iflo_dep_book = (abs(chfdi_iflo_book))/chwfdi_flows_book)
mi.more <- transform(mi.more, chfdi_oflo_dep_book = (abs(chfdi_oflo_book))/chwfdi_flows_book)
mi.more <- transform(mi.more, pfdi_iflo_dep_book = (abs(chfdi_oflo_book))/pwfdi_flows_un)
mi.more <- transform(mi.more, pfdi_oflo_dep_book = (abs(chfdi_iflo_book))/pwfdi_flows_un)

# creating centered trade dependence variables
mi.more <- transform(mi.more, ptradedep_cen = scale(ptradedep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, chtradedep_cen = scale(chtradedep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, chexpdep_cen = scale(chexpdep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, chimpdep_cen = scale(chimpdep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, pexpdep_cen = scale(pexpdep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, pimpdep_cen = scale(pimpdep, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, ptradedep_cen_scale = scale(ptradedep, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, chtradedep_cen_scale = scale(chtradedep, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, chexpdep_cen_scale = scale(chexpdep, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, chimpdep_cen_scale = scale(chimpdep, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, pexpdep_cen_scale = scale(pexpdep, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, pimpdep_cen_scale = scale(pimpdep, center = TRUE, scale = TRUE))

# creating centered FDI dependence variables
mi.more <- transform(mi.more, pfdidep_book_cen = scale(pfdidep_book, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, chfdidep_book_cen = scale(chfdidep_book, center = TRUE, scale = FALSE))
mi.more <- transform(mi.more, pfdidep_book_cen_scale = scale(pfdidep_book, center = TRUE, scale = TRUE))
mi.more <- transform(mi.more, chfdidep_book_cen_scale = scale(chfdidep_book, center = TRUE, scale = TRUE))
#summary(mi.more)

# save mi datasets as a list in RData format (version R&R and post-transformation)
save(mi.more, file = "mi.rr.pt.RData")
# write mi datasets as separate Stata files
write.amelia(mi.more, file.stem = "mi.rr.pt", extension = ".dta", format = "dta")